In [2]:
import sys
sys.setrecursionlimit(10000)
import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning)

import os
os.environ['GNUMPY_IMPLICIT_CONVERSION'] = 'allow'
print os.environ.get('GNUMPY_IMPLICIT_CONVERSION')
allow
In [3]:
import cPickle
import gzip

from breze.learn.data import one_hot
from breze.learn.base import cast_array_to_local_type
from breze.learn.utils import tile_raster_images

import climin.stops
import climin.initialize
from climin import mathadapt as ma

from breze.learn import hvi
from breze.learn.hvi import HmcViModel
from breze.learn.hvi.energies import (NormalGaussKinEnergyMixin, DiagGaussKinEnergyMixin, MlpDiagGaussKinEnergyMixin)
from breze.learn.hvi.inversemodels import MlpGaussInvModelMixin

from matplotlib import pyplot as plt
from matplotlib import cm

import numpy as np

#import fasttsne

from IPython.html import widgets
%matplotlib inline

import theano
theano.config.compute_test_value = 'ignore'#'raise'
from theano import (tensor as T, clone)
Couldn't import dot_parser, loading of dot files will not be possible.
//anaconda/lib/python2.7/site-packages/IPython/html.py:14: ShimWarning: The `IPython.html` package has been deprecated. You should import from `notebook` instead. `IPython.html.widgets` has moved to `ipywidgets`.
  "`IPython.html.widgets` has moved to `ipywidgets`.", ShimWarning)
In [4]:
datafile = '../mnist.pkl.gz'
# Load data.                                                                                                   

with gzip.open(datafile,'rb') as f:                                                                        
    train_set, val_set, test_set = cPickle.load(f)                                                       

X, Z = train_set                                                                                               
VX, VZ = val_set
TX, TZ = test_set

Z = one_hot(Z, 10)
VZ = one_hot(VZ, 10)
TZ = one_hot(TZ, 10)

X_no_bin = X
VX_no_bin = VX
TX_no_bin = TX

# binarize the MNIST data
np.random.seed(0)
VX = np.random.binomial(1, np.tile(VX, (5, 1))) * 1.0
TX = np.random.binomial(1, np.tile(TX, (5, 1))) * 1.0
X  = np.random.binomial(1, X) * 1.0

print VX.shape

image_dims = 28, 28

X_np, Z_np, VX_np, VZ_np, TX_np, TZ_np, X_no_bin_np, VX_no_bin_np, TX_no_bin_np = X, Z, VX, VZ, TX, TZ, X_no_bin, VX_no_bin, TX_no_bin
X, Z, VX, VZ, TX, TZ, X_no_bin, VX_no_bin, TX_no_bin = [cast_array_to_local_type(i) 
                                                        for i in (X, Z, VX,VZ, TX, TZ, X_no_bin, VX_no_bin, TX_no_bin)]
print X.shape
(50000, 784)
(50000, 784)
In [65]:
fig, ax = plt.subplots(figsize=(9, 9))

img = tile_raster_images(X_np[:64], image_dims, (8, 8), (1, 1))
ax.imshow(img, cmap=cm.binary)
Out[65]:
<matplotlib.image.AxesImage at 0x1224d7310>

Model setup

In [10]:
fast_dropout = False

if fast_dropout:
    class MyHmcViModel(HmcViModel, 
                   hvi.FastDropoutMlpBernoulliVisibleVAEMixin, 
                   hvi.FastDropoutMlpGaussLatentVAEMixin, 
                   MlpDiagGaussKinEnergyMixin,
                   MlpGaussInvModelMixin):
        pass

    kwargs = {
        'p_dropout_inpt': .1,
        'p_dropout_hiddens': [.2, .2],
    }

    print 'yeah'

else:
    class MyHmcViModel(HmcViModel, 
                   hvi.MlpBernoulliVisibleVAEMixin, 
                   hvi.MlpGaussLatentVAEMixin, 
                   DiagGaussKinEnergyMixin,  # MlpDiagGaussKinEnergyMixin, DiagGaussKinEnergyMixin
                   MlpGaussInvModelMixin):
        pass
    kwargs = {}


batch_size = 500
#optimizer = 'rmsprop', {'step_rate': 1e-4, 'momentum': 0.95, 'decay': .95, 'offset': 1e-6}
#optimizer = 'adam', {'step_rate': .5, 'momentum': 0.9, 'decay': .95, 'offset': 1e-6}
optimizer = 'adam', {'step_rate': 0.001}

# This is the number of random variables NOT the size of 
# the sufficient statistics for the random variables.
n_latents = 20
n_hidden = 200

m = MyHmcViModel(X.shape[1], n_latents, 
                 [n_hidden, n_hidden], ['softplus'] * 2, 
                 [n_hidden, n_hidden], ['rectifier'] * 2, 
                 [n_hidden, n_hidden], ['rectifier'] * 2,
                 [n_hidden], ['rectifier'],
                 n_hmc_steps=3, n_lf_steps=4,
                 n_z_samples=1,
          optimizer=optimizer, batch_size=batch_size, allow_partial_velocity_update=False, perform_acceptance_step=False,
          compute_transition_densities=True, consider_aux_inv_model_inpt_constant=False,
          **kwargs)

climin.initialize.randomize_normal(m.parameters.data, 0.1, 1e-1)
#m.parameters.__setitem__(m.hmc_sampler.step_size_param, 0.2)
#m.parameters.__setitem__(m.kin_energy.mlp.layers[-1].bias, 1)

Loading parameters

In [11]:
old_best_params = None
#print m.score(TX)
print m.parameters.data.shape
(616685,)
In [12]:
FILENAME = 'hvi_gen2_recog2_aux2_late20_hid200_pretrained.pkl'

# In[5]:
#old_best_params = None
f = open(FILENAME, 'rb')
np_array = cPickle.load(f)
old_best_params = cast_array_to_local_type(np_array)
f.close()
print old_best_params.shape
(616685,)
In [13]:
m.parameters.data = old_best_params.copy()
old_best_loss = m.score(VX)
print old_best_loss
compiled score function
95.69007

Model performance

In [14]:
#print m.score(VX)
print m.score(TX)
print m.score(X[:50000])
94.88718
94.77127
In [ ]:
print m.parameters.view(m.init_recog.mlp.layers[2].bias)
In [ ]:
#m.parameters.__setitem__(m.hmc_sampler.step_size_param, m.parameters.view(m.hmc_sampler.step_size_param) * np.sqrt(4.0/2.0))
#m.parameters.__setitem__(m.init_recog.mlp.layers[2].bias, cast_array_to_local_type(np.array([ 0.14096579,  0.53084856, -1.75648117, -2.5653615 ])))
#m.parameters.__setitem__(m.kin_energy.variance_parameter, cast_array_to_local_type(-0.7*np.ones_like(m.kin_energy.variance_parameter)))
In [15]:
print 0.1 * m.parameters.view(m.hmc_sampler.step_size_param) ** 2 + 1e-8
[ 0.06400001]
In [47]:
print m.parameters.view(m.hmc_sampler.partial_velocity_parameter)
print np.tanh(m.parameters.view(m.hmc_sampler.partial_velocity_parameter))
[ 0.48037896]
[ 0.44654706]
In [61]:
print m.estimate_nll(TX, 100)
128.695835445
In [ ]:
m._f_loss, m._f_dloss = m._make_loss_functions()
In [ ]:
if False:
    full_f_dloss = m._f_dloss
    only_aux_inv_mask = np.zeros_like(m.parameters.data)
    for i in range(3):
        auxiliary_inv_weight_param_range = m.parameters._var_to_slice[m.auxiliary_inv_model.mlp.layers[i].weights]
        auxiliary_inv_bias_param_range = m.parameters._var_to_slice[m.auxiliary_inv_model.mlp.layers[i].bias]
        print auxiliary_inv_weight_param_range, auxiliary_inv_bias_param_range
        only_aux_inv_mask[slice(*auxiliary_inv_weight_param_range)] = 1.0
        only_aux_inv_mask[slice(*auxiliary_inv_bias_param_range)] = 1.0

    def only_aux_inv_loss(params, data, *args):
        return cast_array_to_local_type(only_aux_inv_mask) * full_f_dloss(params, data, *args)

    m._f_dloss = only_aux_inv_loss
In [ ]:
from theano.printing import debugprint
#debugprint(m._f_dloss.theano_func)
In [ ]:
print m.parameters._var_to_shape
print
print m.parameters._var_to_slice
In [ ]:
grad_array = np.zeros((100, len(m.parameters.data)))
for i in range(100):
    grad_array[i, :] = m._f_dloss(m.parameters.data, X[:625])
In [ ]:
mean_grad = grad_array.mean(axis=0)
print abs(mean_grad).mean()
print mean_grad.min(), mean_grad.argmin()
print mean_grad.max(), mean_grad.argmax()

# 0.0068310414746
# -8.18554486275 196327
# 14.5694446664 197230
In [ ]:
x = (abs(mean_grad) > 5).astype('int32')
matchings_indices = [ i for i, value in enumerate(x) if value == 1 ]
print matchings_indices
# [196327, 197226, 197230, 197270, 197450, 197566, 197750, 197862, 197866, 197938]
In [ ]:
std_grad = grad_array.std(axis=0)
print std_grad.mean()
print std_grad.min(), std_grad.argmin()
print std_grad.max(), std_grad.argmax()
print mean_grad[std_grad.argmax()]

# 0.0173799498656
# 0.0 2
# 179.490609337 197230
# 14.5694446664
In [ ]:
print (abs(mean_grad) > 1).sum()
print (abs(mean_grad) > 2).sum()
print (abs(mean_grad) > 10).sum()
# 310
# 55
# 2
In [ ]:
consconst_mean_grad = mean_grad

Model training

In [ ]:
TARGET_FILENAME = 'hvi_gen2_recog2_aux2_late2_hid200_pretr_3hmc_04lf_np_BEST_SO_FAR_with_consider_constant'
FILETYPE_EXTENSION = '.pkl'
old_best_params = None

m.optimizer = 'adam', {'step_rate': 0.0001}

max_passes = 200
max_iter = max_passes * X.shape[0] / batch_size
n_report = X.shape[0] / batch_size

stop = climin.stops.AfterNIterations(max_iter)
pause = climin.stops.ModuloNIterations(n_report)

# theano.config.optimizer = 'fast_compile'

for i, info in enumerate(m.powerfit((X_no_bin,), (VX,), stop, pause, eval_train_loss=False)):
    print i, m.score(X[:10000]).astype('float32'), info['val_loss'], np.exp(m.parameters.view(m.kin_energy.variance_parameter).as_numpy_array()), 0.1*m.parameters.view(m.hmc_sampler.step_size_param).as_numpy_array()**2 + 1e-8
    if i == 0 and old_best_params is not None:
        if info['best_loss'] > old_best_loss:
            info['best_loss'] = old_best_loss
            info['best_pars'] = old_best_params
    
    if info['best_loss'] == info['val_loss']:
        f = open(TARGET_FILENAME + FILETYPE_EXTENSION, 'wb')
        cPickle.dump(m.parameters.data, f, protocol=cPickle.HIGHEST_PROTOCOL)
        f.close()
In [ ]:
train_result_params = m.parameters.data.copy()
#m.parameters.data = info['best_pars']
#m.score(VX)
In [ ]:
m.parameters.data = train_result_params

Function definitions

In [16]:
f_z_init_sample = m.function(['inpt'], m.init_recog.sample(), numpy_result=True)
f_z_sample = m.function(['inpt'], m.hmc_sampler.output, numpy_result=True)
f_gen = m.function([m.gen.inpt], m.gen.sample(), numpy_result=True)
f_gen_rate = m.function([m.gen.inpt], m.gen.rate, numpy_result=True)
f_joint_nll = m.function(['inpt'], m.joint_nll, numpy_result=True)
In [17]:
curr_pos = T.matrix('current_position')
curr_vel = T.matrix('current_velocity')
norm_noise = T.matrix('normal_noise')
unif_noise = T.vector('uniform_noise')

new_sampled_vel = m.hmc_sampler.kin_energy.sample(norm_noise)
updated_vel = m.hmc_sampler.partial_vel_constant * curr_vel + m.hmc_sampler.partial_vel_complement * new_sampled_vel
performed_hmc_steps = m.hmc_sampler.perform_hmc_steps(curr_pos, curr_vel)
hmc_step = m.hmc_sampler.hmc_step(curr_pos, curr_vel, np.float32(0), norm_noise, unif_noise)
lf_step_results = m.hmc_sampler.simulate_dynamics(curr_pos, curr_vel, return_full_list=True)

f_pot_en = m.function(['inpt', curr_pos], m.hmc_sampler.eval_pot_energy(curr_pos), numpy_result=True)
f_kin_en = m.function(['inpt', curr_vel], m.kin_energy.nll(curr_vel).sum(-1), numpy_result=True)
f_perform_hmc_steps = m.function(['inpt', curr_pos, curr_vel], 
                                T.concatenate([performed_hmc_steps[0], performed_hmc_steps[1]], axis=1))
f_hmc_step = m.function(['inpt', curr_pos, curr_vel, norm_noise, unif_noise], 
                        T.concatenate([hmc_step[0], hmc_step[1]],axis=1), on_unused_input='warn')
f_kin_energy_sample_from_noise = m.function(['inpt', norm_noise], new_sampled_vel)
f_updated_vel_from_noise = m.function(['inpt', curr_vel, norm_noise], updated_vel)
f_perform_lf_steps = m.function(['inpt', curr_pos, curr_vel],
                               T.concatenate([lf_step_results[0], lf_step_results[1]], axis=0))
/Users/christopher/Documents/Masterarbeit/myPython/breze/breze/arch/util.py:630: UserWarning: theano.function was asked to create a function computing outputs given certain inputs, but the provided input variable at index 5 is not part of the computational graph needed to compute the outputs: uniform_noise.
To make this warning into an error, you can pass the parameter on_unused_input='raise' to theano.function. To disable it completely, use on_unused_input='ignore'.
  on_unused_input=on_unused_input, updates=updates)
In [ ]:
z1_by_z0_0 = T.grad(lf_step_results[0][-1, 0, 0], curr_pos)
z1_by_z0_1 = T.grad(lf_step_results[0][-1, 0, 1], curr_pos)
z1_by_z0 = T.concatenate([z1_by_z0_0, z1_by_z0_1], axis=0)
f_z1_by_z0 = m.function(['inpt', curr_pos, curr_vel], z1_by_z0, numpy_result=True, on_unused_input='warn')

zT_by_z0_0 = T.grad(performed_hmc_steps[0][-1, 0, 0], curr_pos)
zT_by_z0_1 = T.grad(performed_hmc_steps[0][-1, 0, 1], curr_pos)
zT_by_z0 = T.concatenate([zT_by_z0_0, zT_by_z0_1], axis=0)
f_zT_by_z0 = m.function(['inpt', curr_pos, curr_vel], zT_by_z0, numpy_result=True, on_unused_input='warn')
In [18]:
f_z_init_mean = m.function(['inpt'], m.init_recog.mean, numpy_result=True)
f_z_init_var = m.function(['inpt'], m.init_recog.var, numpy_result=True)

f_v_init_var = m.function(['inpt'], T.extra_ops.cpu_contiguous(m.kin_energy.var), numpy_result=True)

full_sample = m.hmc_sampler.sample_with_path()
f_full_sample = m.function(['inpt'], T.concatenate([full_sample[0], full_sample[1]], axis=1))
In [20]:
pos = T.matrix('pos')
vel = T.matrix('vel')
updated_vel = T.matrix('updated_vel')
time_step = T.vector('time_step')

aux_inpt_replacements = {m.auxiliary_inv_model_inpt['position']: pos, 
                         m.auxiliary_inv_model_inpt['time']: T.cast(time_step[0], dtype='float32')}
if 'velocity' in m.auxiliary_inv_model_inpt:  # only use the updated velocity if it is part of the model
    aux_inpt_replacements.update({m.auxiliary_inv_model_inpt['velocity']: updated_vel})

aux_inv_model_var = clone(m.auxiliary_inv_model.var, replace=aux_inpt_replacements)
aux_inv_model_mean = clone(m.auxiliary_inv_model.mean, replace=aux_inpt_replacements)
aux_inv_model_nll = clone(m.auxiliary_inv_model.nll(vel).sum(-1), replace=aux_inpt_replacements)

if 'velocity' not in m.auxiliary_inv_model_inpt:
    f_aux_inv_var = m.function(['inpt', pos, time_step], aux_inv_model_var, numpy_result=True)
    f_aux_inv_mean = m.function(['inpt', pos, time_step], aux_inv_model_mean, numpy_result=True)
    f_aux_inv_nll = m.function(['inpt', pos, time_step, vel], aux_inv_model_nll, numpy_result=True)
else:
    f_aux_inv_var = m.function(['inpt', pos, updated_vel, time_step], aux_inv_model_var, numpy_result=True)
    f_aux_inv_mean = m.function(['inpt', pos, updated_vel, time_step], aux_inv_model_mean, numpy_result=True)
    f_aux_inv_nll = m.function(['inpt', pos, updated_vel, time_step, vel], aux_inv_model_nll, numpy_result=True)
        
final_vel_inpt_replacements = {m.final_vel_model_inpt['position']: pos, 
                               m.final_vel_model_inpt['time']: T.cast(m.n_hmc_steps, dtype='float32')}

final_vel_model_var = clone(m.final_vel_model.var, replace=final_vel_inpt_replacements)
final_vel_model_mean = clone(m.final_vel_model.mean, replace=final_vel_inpt_replacements)
final_vel_model_nll = clone(m.final_vel_model.nll(vel).sum(-1), replace=final_vel_inpt_replacements)

f_v_final_var = m.function(['inpt', pos], final_vel_model_var, numpy_result=True)
f_v_final_mean = m.function(['inpt', pos], final_vel_model_mean, numpy_result=True)
f_v_final_model_nll = m.function(['inpt', pos, vel], final_vel_model_nll, numpy_result=True)

f_kin_energy_nll = m.function(['inpt'], m.kin_energy.expected_nll, numpy_result=True)
In [19]:
f_init_recog_nll = m.function(['inpt'], m.init_recog.expected_nll.sum(-1), numpy_result=True)
In [ ]:
pos = T.matrix()
f_pot_en_deriv = m.function(['inpt', pos], m.hmc_sampler.eval_pot_energy_deriv(pos))
f_pot_en_2nd_deriv0 = m.function(['inpt', m.hmc_sampler.pot_energy_inpt], T.grad(m.hmc_sampler.pot_energy_deriv[0, 0], m.hmc_sampler.pot_energy_inpt))
f_pot_en_2nd_deriv1 = m.function(['inpt', m.hmc_sampler.pot_energy_inpt], T.grad(m.hmc_sampler.pot_energy_deriv[0, 1], m.hmc_sampler.pot_energy_inpt))
In [20]:
print f_init_recog_nll(VX).mean()
init_var = f_z_init_var(VX)
print init_var.mean()
print init_var.max()
print init_var.min()
print
print f_joint_nll(TX).mean()
1.97692
0.140241
1.1181
0.00239332

94.6173

Visualizations

Comparison of initial recognition model with full HVI model on training samples

Reconstruction of training samples

In [21]:
fig, axs = plt.subplots(2, 3, figsize=(27, 18))

### Original data

O = (X_no_bin_np[:64])[:, :784].astype('float32')
img = tile_raster_images(O, image_dims, (8, 8), (1, 1))
axs[0, 0].imshow(img, cmap=cm.binary)

O2 = (X_np[:64])[:, :784].astype('float32')
img = tile_raster_images(O2, image_dims, (8, 8), (1, 1))
axs[1, 0].imshow(img, cmap=cm.binary)

### Reconstruction

#z_sample = f_z_sample((X[:64]))
z_init_sample = cast_array_to_local_type(f_z_init_sample((X[:64])))
z_sample = f_perform_hmc_steps((X[:64]), 
                               z_init_sample, 
                               f_kin_energy_sample_from_noise((X[:64]), 
                                                              cast_array_to_local_type(np.random.normal(size=(64, m.n_latent)).astype('float32')))
                               )[-1, :64, :]

R = f_gen_rate(z_sample)[:, :784].astype('float32')
img = tile_raster_images(R, image_dims, (8, 8), (1, 1))
axs[0, 1].imshow(img, cmap=cm.binary)

Rinit = f_gen_rate(z_init_sample)[:, :784].astype('float32')
img = tile_raster_images(Rinit, image_dims, (8, 8), (1, 1))
axs[0, 2].imshow(img, cmap=cm.binary)

R2 = f_gen(z_sample)[:, :784].astype('float32')
img = tile_raster_images(R2, image_dims, (8, 8), (1, 1))
axs[1, 1].imshow(img, cmap=cm.binary)

Rinit2 = f_gen(z_init_sample)[:, :784].astype('float32')
img = tile_raster_images(Rinit2, image_dims, (8, 8), (1, 1))
axs[1, 2].imshow(img, cmap=cm.binary)
Out[21]:
<matplotlib.image.AxesImage at 0x11db7cd50>

Generating new data samples

In [22]:
fig, axs = plt.subplots(1, 2, figsize=(18, 9))

prior_sample = cast_array_to_local_type(np.random.randn(64, m.n_latent))

S = f_gen_rate(prior_sample)[:, :784].astype('float32')
img = tile_raster_images(S, image_dims, (8, 8), (1, 1))
axs[0].imshow(img, cmap=cm.binary)

S2 = f_gen(prior_sample)[:, :784].astype('float32')
img = tile_raster_images(S2, image_dims, (8, 8), (1, 1))
axs[1].imshow(img, cmap=cm.binary)

#S3 = f_gen_rate(prior_sample)[:, :784].astype('float32')
#img = tile_raster_images(S, image_dims, (8, 8), (1, 1))
#axs[2, 2].imshow(img, cmap=cm.nipy_spectral)
Out[22]:
<matplotlib.image.AxesImage at 0x11b83e8d0>

Learned data manifold

In [23]:
dim1 = 0
dim2 = 1
In [24]:
from scipy.stats import norm as normal_distribution

unit_interval_positions = np.linspace(0.025, 0.975, 20)
positions = normal_distribution.ppf(unit_interval_positions)
print unit_interval_positions
print positions

latent_array = np.zeros((400, m.n_latent))
latent_array[:, dim2] = -np.repeat(positions, 20)  # because images are filled top -> bottom, left -> right (row by row)
latent_array[:, dim1] = np.tile(positions, 20)
        
fig, axs = plt.subplots(1, 1, figsize=(24, 24))

F = f_gen_rate(cast_array_to_local_type(latent_array)).astype('float32')

img = tile_raster_images(F, image_dims, (20, 20), (1, 1))
#axs.imshow(img, cmap=cm.nipy_spectral)
axs.imshow(img, cmap=cm.binary)
[ 0.025  0.075  0.125  0.175  0.225  0.275  0.325  0.375  0.425  0.475
  0.525  0.575  0.625  0.675  0.725  0.775  0.825  0.875  0.925  0.975]
[-1.95996398 -1.43953147 -1.15034938 -0.93458929 -0.75541503 -0.59776013
 -0.45376219 -0.31863936 -0.18911843 -0.06270678  0.06270678  0.18911843
  0.31863936  0.45376219  0.59776013  0.75541503  0.93458929  1.15034938
  1.43953147  1.95996398]
Out[24]:
<matplotlib.image.AxesImage at 0x119fa5710>

Latent representation of training set X

In [25]:
data_set = 'train'  # 'train', 'validation', 'test'

if data_set == 'train':
    L = f_z_sample(X)
    L_init = f_z_init_sample(X)
    class_vec = Z[:].argmax(1)
elif data_set == 'validation':
    L = f_z_sample(VX)
    L_init = f_z_init_sample(VX)
    class_vec = VZ[:].argmax(1)
elif data_est == 'test':
    L = f_z_sample(TX)
    L_init = f_z_init_sample(TX)
    class_vec = TZ[:].argmax(1)
else:
    print 'unknown data set, must be one of: train, validation, test'

Select dimensions

In [39]:
dim1 = 0
dim2 = 3
In [41]:
fig, axs = plt.subplots(1, 2, figsize=(18, 9))

normal_cdf_rescale = True

if normal_cdf_rescale:
    end_points = normal_distribution.cdf(L)
    init_points = normal_distribution.cdf(L_init)
else:
    end_points = L
    init_points = L_init

axs[0].scatter(end_points[:, dim1], end_points[:, dim2], c=class_vec, lw=0, s=10, alpha=.4)
axs[1].scatter(init_points[:, dim1], init_points[:, dim2], c=class_vec, lw=0, s=10, alpha=.4)

cax = fig.add_axes([0.95, 0.2, 0.02, 0.6])
cax.scatter(np.repeat(0, 10), np.arange(10), c=np.arange(10), lw=0, s=300)
cax.set_xlim(-0.1, 0.1)
cax.set_ylim(-0.5, 9.5)
plt.yticks(np.arange(10))
plt.tick_params(axis='x', which='both', bottom='off', top='off', labelbottom='off')
cax.tick_params(axis='y', colors='white')
for tick in cax.yaxis.get_major_ticks():
    tick.label.set_fontsize(14)
    tick.label.set_color('black')
    
cax.spines['bottom'].set_color('white')
cax.spines['top'].set_color('white') 
cax.spines['right'].set_color('white')
cax.spines['left'].set_color('white')

axs[0].set_title('After HMC steps')
axs[1].set_title('Initial recognition model')

if normal_cdf_rescale:
    axs[0].set_xlim(0, 1)
    axs[0].set_ylim(0, 1)
    axs[1].set_xlim(0, 1)
    axs[1].set_ylim(0, 1)
else:
    axs[0].set_xlim(-3, 3)
    axs[0].set_ylim(-3, 3)
    axs[1].set_xlim(-3, 3)
    axs[1].set_ylim(-3, 3)
In [42]:
if n_latents == 2:
    if normal_cdf_rescale:
        fig, axs = plt.subplots(1, 1, figsize=(24, 24))

        F = f_gen_rate(cast_array_to_local_type(latent_array)).astype('float32')
        img = tile_raster_images(F, image_dims, (20, 20), (1, 1))
        #axs.imshow(img, cmap=cm.nipy_spectral)
        axs.imshow(img, cmap=cm.binary)
        axs.scatter(578.0*end_points[:, dim1], 578.0*(1 - end_points[:, dim2]), c=class_vec, lw=0, s=10, alpha=0.7)

        axs.set_xlim([0, 578])
        axs.set_ylim([578, 0])
    else:
        print 'Currently only available, if the scatter plot used rescaling by the normal CDF.'
        print 'Set normal_cdf_rescale=True in the previous cell'
In [43]:
fig, axs = plt.subplots(4, 5, figsize=(20, 16))
colors = cm.jet(np.linspace(0, 1, 10))
for i in range(5):
    axs[0, i].scatter(init_points[Z[:].argmax(1) == i, dim1], init_points[class_vec == i, dim2], c=colors[i], lw=0, s=5, alpha=.2)
    axs[1, i].scatter(end_points[Z[:].argmax(1) == i, dim1], end_points[class_vec == i, dim2], c=colors[i], lw=0, s=5, alpha=.2)
    axs[0, i].set_title(str(i) + ' before HMC')
    axs[1, i].set_title(str(i) + ' after HMC')
    axs[2, i].scatter(init_points[Z[:].argmax(1) == (5+i), dim1], init_points[class_vec == (5+i), dim2], c=colors[5+i], lw=0, s=5, alpha=.2)
    axs[3, i].scatter(end_points[Z[:].argmax(1) == (5+i), dim1], end_points[class_vec == (5+i), dim2], c=colors[5+i], lw=0, s=5, alpha=.2)
    axs[2, i].set_title(str(5+i) + ' before HMC')
    axs[3, i].set_title(str(5+i) + ' after HMC')
    for j in range(4):
        if normal_cdf_rescale:
            axs[j, i].set_xlim(0, 1)
            axs[j, i].set_ylim(0, 1)
        else:
            axs[j, i].set_xlim(-3, 3)
            axs[j, i].set_ylim(-3, 3)

Sample specific visualizations

Configuration: Select sample

In [72]:
X_index = 2  # index=0 -> 5, index=1 -> 0, index=2 -> 4, index=3 -> 1, index=24 -> underlined 1, index=39 -> ugly 6
from_data_set = X
from_data_set_no_bin = X_no_bin
num_repeats = 1000

X_array = np.array([from_data_set[X_index, :]]).astype('float32')
X_array_no_bin = np.array([from_data_set_no_bin[X_index % (from_data_set_no_bin.shape[0]), :]]).astype('float32')
fig, axs = plt.subplots(1, 2, figsize=(6, 3))

img = tile_raster_images(X_array, image_dims, (1, 1), (1, 1))
axs[0].imshow(img, cmap=cm.binary)
img = tile_raster_images(X_array_no_bin, image_dims, (1, 1), (1, 1))
axs[1].imshow(img, cmap=cm.binary)
Out[72]:
<matplotlib.image.AxesImage at 0x12b695bd0>
In [73]:
repeated_X = cast_array_to_local_type(np.tile(X_array, (num_repeats, 1)))

full_sample = f_full_sample(repeated_X).astype('float32')
z_samples = full_sample[:, :num_repeats, :]
v_samples = full_sample[:, num_repeats:, :]

z_sample_mean = z_samples[:, :, :].mean(axis=1)
z_sample_std = z_samples[:, :, :].std(axis=1)

single_X = cast_array_to_local_type(X_array)
init_mean = f_z_init_mean(single_X)[0]
init_var = f_z_init_var(single_X)[0]

init_vel_var = f_v_init_var(single_X)[0]

print 'Posterior distribution statistics'
print
print 'Initial model: - Mean: ' + str(init_mean)
print '               - Var:  ' + str(init_var)
print
print 'Full HVI model: - Mean: ' + str(z_sample_mean[m.n_hmc_steps])
print '                - Var:  ' + str(z_sample_std[m.n_hmc_steps] ** 2)
print
print 'Velocity model variance: ' + str(init_vel_var)
Posterior distribution statistics

Initial model: - Mean: [ 0.89884293  1.01447845  0.33550656 -2.17000556 -0.7094202   1.44958508
  1.12196112 -1.11856937  0.43205598  1.21434093  0.48099008  0.60085404
 -1.53614604 -1.23608863 -0.1724641   0.31895682 -1.48434937  1.06526661
 -1.54501581 -1.94031179]
               - Var:  [ 0.02320246  0.05223065  0.11667001  0.04472321  0.00723788  0.03490382
  0.12999645  0.00991914  0.18848144  0.20228809  0.07652856  0.07854968
  0.13943753  0.06994063  0.02326722  0.25800544  0.07139766  0.17840792
  0.06084835  0.04471516]

Full HVI model: - Mean: [ 0.99199194  1.03670537  0.19779709 -2.2042191  -0.62533772  1.44381654
  1.26229465 -1.29891384  0.56472874  1.21335852  0.34039763  0.41977564
 -1.38681543 -1.43662226 -0.12100496  0.40160009 -1.53221965  1.04057646
 -1.36012542 -2.07729816]
                - Var:  [ 0.03138435  0.05379143  0.13641134  0.03591429  0.01255376  0.0527862
  0.17019647  0.01948533  0.22499014  0.219216    0.08616598  0.09938201
  0.163715    0.0837143   0.03325921  0.29695445  0.0903235   0.22618984
  0.12033025  0.07876816]

Velocity model variance: [ 1.00180292  0.99590409  0.99723423  0.99375516  0.99142063  1.00240815
  1.00874197  0.99035424  1.00333655  1.00458968  1.00266159  0.99721366
  1.00620472  1.00260592  1.00631106  0.99512845  0.99569142  1.00447357
  0.9997499   1.00460839]
In [74]:
mean_loss = m.score(repeated_X).mean()
print 'Mean loss for this X: ', mean_loss

single_z_evolution = z_samples[:, 0, :]

R = f_gen_rate(cast_array_to_local_type(single_z_evolution)).astype('float32')

num_steps = m.n_hmc_steps + 1
num_images = num_steps + 1
fig, axs = plt.subplots(1, num_images, figsize=(9*num_images, 9))

img = tile_raster_images(X_array, image_dims, (1, 1), (1, 1))
axs[0].imshow(img, cmap=cm.nipy_spectral)  # cmap=cm.binary

for i in range(num_steps):
    img = tile_raster_images(np.array([R[i]]), image_dims, (1, 1), (1, 1))
    axs[1 + i].imshow(img, cmap=cm.nipy_spectral)
    pot_en_of_image = f_pot_en(single_X, cast_array_to_local_type(np.array([single_z_evolution[i]])))[0]
    axs[1 + i].set_title('Pot. energy: ' + str(pot_en_of_image), fontsize=32)
Mean loss for this X:  126.632984375

Configuration: Select dimensions

In [75]:
dim1 = 0
dim2 = 3

Energy surfaces

In [76]:
number_images_per_axis = 10
resolution_per_image = 19  # must be an odd number
x_range = .5
y_range = .5

resolution = number_images_per_axis * resolution_per_image
z_sample_final_mean = z_sample_mean[m.n_hmc_steps]
lower_dim1_limit = z_sample_final_mean[dim1] - 0.5*x_range
upper_dim1_limit = z_sample_final_mean[dim1] + 0.5*x_range
lower_dim2_limit = z_sample_final_mean[dim2] - 0.5*y_range
upper_dim2_limit = z_sample_final_mean[dim2] + 0.5*y_range

latent_array = np.zeros((number_images_per_axis**2, n_latents))

pot_energy_matrix = np.zeros((resolution, resolution), dtype='float32')
x = np.linspace(lower_dim1_limit, upper_dim1_limit, resolution)
y = np.linspace(lower_dim2_limit, upper_dim2_limit, resolution)
for i in range(resolution):
    for j in range(resolution):
        #pos_array = f_z_init_mean(single_X)
        pos_array = np.array([z_sample_final_mean])
        pos_array[0, dim1] = x[i]
        pos_array[0, dim2] = y[j]
        pos_array_of_local_type = cast_array_to_local_type(pos_array)
        pot_energy_matrix[j, i] = f_pot_en(single_X, pos_array_of_local_type)[0]
        if i % resolution_per_image == (resolution_per_image - 1)/2 and j % resolution_per_image == (resolution_per_image - 1)/2:
            latent_array[(i//resolution_per_image) + (number_images_per_axis - 1 - j//resolution_per_image)*number_images_per_axis , :] = pos_array[0, :]

        
print 'Minimum potential energy (at grid points): ' + str(pot_energy_matrix.min())
print 'Maximum potential energy (at grid points): ' + str(pot_energy_matrix.max())

fig, axs = plt.subplots(1, 1, figsize=(15, 15))
CS = axs.contour(x, y, pot_energy_matrix, 40)
plt.clabel(CS, inline=1, fmt='%1.1f', fontsize=10, colors=None)  # colors='black'

F = f_gen_rate(cast_array_to_local_type(latent_array))
img = tile_raster_images(F, image_dims, (number_images_per_axis, number_images_per_axis), (1, 1))

axs.imshow(img, cmap=cm.binary, extent=(x.min(), x.max(), y.min(), y.max()))
axs.set_title('Potential energy surface')
plt.show()
Minimum potential energy (at grid points): 112.466
Maximum potential energy (at grid points): 116.52
In [49]:
resolution = 50
underlying_variance = f_v_init_var(single_X)
velocity_range_for_images = 10.0 * np.sqrt(underlying_variance[0, :])
lower_dim1_limit = np.around(- velocity_range_for_images[dim1])
upper_dim1_limit = np.around(  velocity_range_for_images[dim1])
lower_dim2_limit = np.around(- velocity_range_for_images[dim2])
upper_dim2_limit = np.around(  velocity_range_for_images[dim2])

kin_energy_matrix = np.zeros((resolution, resolution), dtype='float32')
kin_x = np.linspace(lower_dim1_limit, upper_dim1_limit, resolution)
kin_y = np.linspace(lower_dim2_limit, upper_dim2_limit, resolution)
for i in range(resolution):
    for j in range(resolution):
        vel_array = np.zeros((1, m.n_latent)).astype('float32')
        vel_array[0, dim1] = kin_x[i]
        vel_array[0, dim2] = kin_y[j]
        vel_array_of_local_type = cast_array_to_local_type(vel_array)
        kin_energy_matrix[j, i] = f_kin_en(single_X, vel_array_of_local_type)

print 'Minimum kinetic energy (at grid points): ' + str(kin_energy_matrix.min())
print 'Maximum kinetic energy (at grid points): ' + str(kin_energy_matrix.max())

fig, ax = plt.subplots(1, 1, figsize=(9, 9))
CS = ax.contour(kin_x, kin_y, kin_energy_matrix)
#plt.axes().set_aspect('equal', 'datalim')
plt.clabel(CS, inline=1, fmt='%1.1f', fontsize=10)
ax.set_title('Kinetic energy surface')
plt.show()
Minimum kinetic energy (at grid points): 18.4225
Maximum kinetic energy (at grid points): 118.723
//anaconda/lib/python2.7/site-packages/matplotlib/text.py:52: UnicodeWarning: Unicode equal comparison failed to convert both arguments to Unicode - interpreting them as being unequal
  if rotation in ('horizontal', None):
//anaconda/lib/python2.7/site-packages/matplotlib/text.py:54: UnicodeWarning: Unicode equal comparison failed to convert both arguments to Unicode - interpreting them as being unequal
  elif rotation == 'vertical':

Evolution of recognition model distribution with HMC steps

In [78]:
fig, axs = plt.subplots(m.n_hmc_steps + 1, 3, figsize=(18, (m.n_hmc_steps + 1) * 6))
colors = cm.jet(np.linspace(0, 1, 10))

#contour_levels = (198, 200, 202, 204, 206, 208, 210)
#contour_levels = (120, 130, 140, 150, 160, 180, 200, 240, 280)
#contour_levels = (100, 102, 104, 106, 108, 110, 115, 120, 125, 130)
#contour_levels = (400, 402, 404, 406, 408, 410, 412, 416, 420)
#contour_levels = (106, 108, 110, 112, 114, 116, 118, 120, 124, 128)
#contour_levels = (160, 165, 170, 175, 180, 185, 190, 195, 200, 210, 220, 230, 240, 250, 270, 300)
#contour_levels = (174, 175, 176, 177, 178, 180, 182, 184, 186, 190, 200)
#contour_levels = (59, 61, 63, 65, 67, 69, 71, 73, 75, 80, 85, 90)
contour_levels = 40

vel_contour_levels = np.linspace(2.0, 70.0, 18)
#CS0 = axs[0, 0].contourf(x, y, pot_energy_matrix, np.linspace(155, 240, 500))

def colour_for_z_samples(samples):
    mean = samples.mean(axis=0)
    mean1 = mean[dim1]
    mean2 = mean[dim2]
    colour = np.zeros_like(samples[:, 0])
    colour[np.logical_and(samples[:, dim1] < mean1,  samples[:, dim2] < mean2)] = 0
    colour[np.logical_and(samples[:, dim1] < mean1,  samples[:, dim2] >= mean2)] = 2
    colour[np.logical_and(samples[:, dim1] >= mean1, samples[:, dim2] < mean2)] = 4
    colour[np.logical_and(samples[:, dim1] >= mean1, samples[:, dim2] >= mean2)] = 7
    colour[((samples[:, dim1] - mean1) ** 2 + (samples[:, dim2] - mean2) ** 2) < 1e-5] = 9
    return colour.astype('int32')

for i in range(m.n_hmc_steps + 1):
    CS = axs[i, 0].contour(x, y, pot_energy_matrix, contour_levels)
    plt.clabel(CS, inline=1, fmt='%1.0f', fontsize=10)
    axs[i, 0].scatter(z_samples[i,:,dim1], z_samples[i,:,dim2], c=colors[colour_for_z_samples(z_samples[i,:,:])], s=20, alpha=.3, lw=0)
    
    CS_vel = axs[i, 1].contour(kin_x, kin_y, kin_energy_matrix, vel_contour_levels)
    plt.clabel(CS_vel, inline=1, fmt='%1.1f', fontsize=10)
    axs[i, 1].scatter(v_samples[i,:,dim1], v_samples[i,:,dim2], c=colors[colour_for_z_samples(z_samples[i,:,:])], s=20, alpha=.3, lw=0)
    
    pot_energy_distrib = f_pot_en(repeated_X, cast_array_to_local_type(z_samples[i, :, :]))
    if i == 0:
        max_x_value_for_hist = pot_energy_distrib.max() + 5
        min_x_value_for_hist = np.floor(pot_energy_matrix.min()) -5
    pot_energy_distrib_mean = pot_energy_distrib.mean()
    axs[i, 2].hist(pot_energy_distrib, 30, normed=1, range=(min_x_value_for_hist, max_x_value_for_hist))
    axs[i, 2].autoscale(enable=False, axis='both')
    axs[i, 2].axvline(pot_energy_distrib_mean, color='r', linestyle='dashed', linewidth=2)
    axs[i, 2].set_xlim(min_x_value_for_hist, max_x_value_for_hist)
    axs[i, 2].text(pot_energy_distrib_mean + 1.0, 0.8*axs[i, 2].get_ylim()[1], 'Mean: ' + str(pot_energy_distrib_mean))
    axs[i, 1].set_xlim(-velocity_range_for_images[dim1], velocity_range_for_images[dim1])
    axs[i, 1].set_ylim(-velocity_range_for_images[dim2], velocity_range_for_images[dim2])
    axs[i, 1].set_aspect('equal', 'datalim')
    axs[i, 0].set_aspect('equal', 'datalim')

axs[0, 0].scatter(f_z_init_mean(single_X)[0, dim1], f_z_init_mean(single_X)[0, dim2], c='black', s=20)

plt.show()
In [ ]:
z_pos = cast_array_to_local_type(np.array([z_sample_mean[m.n_hmc_steps]])) # [z_sample_mean[3]]

second_deriv0 = f_pot_en_2nd_deriv0(single_X, z_pos)
second_deriv1 = f_pot_en_2nd_deriv1(single_X, z_pos)
second_deriv = np.concatenate([second_deriv0, second_deriv1], axis=0)
det = np.linalg.det(second_deriv)
trace = np.trace(second_deriv)

if trace >= 0 and det >= 0:
    eigvalstring = '+ +'
elif det < 0:
    eigvalstring = '+ -'
else:  # so trace < 0 and det >= 0
    eigvalstring = '- -'

print 'Potential energy:'
print f_pot_en(single_X, z_pos)
print
print '1st derivative:'
print f_pot_en_deriv(single_X, z_pos)
print
print '2nd derivative:'
print second_deriv
print
print '2nd deriv. eigenvalue signs: ', eigvalstring
In [ ]:
#debugprint(f_z1_by_z0.theano_func.theano_func)
zposss = cast_array_to_local_type(np.array([single_z_evolution[0]]))
print f_z1_by_z0(single_X, zposss, zposss)
print f_zT_by_z0(single_X, zposss, zposss)

Evolution of a single sample

In [41]:
np.random.seed(1)

velocity_noise = cast_array_to_local_type(np.random.normal(size=(m.n_hmc_steps, 1, m.n_latent)))
#velocity_noise = np.zeros_like(velocity_noise)
f_z_init_sample
init_pos = f_z_init_sample(single_X) # f_z_init_mean(single_X) # + np.array([0.0, 0.1])
init_vel = f_kin_energy_sample_from_noise(single_X, velocity_noise[0])

num_vels_per_hmc = (m.n_lf_steps + 2)
position_array = np.zeros((m.n_hmc_steps * m.n_lf_steps + 1, m.n_latent))
position_array[0] = init_pos
velocity_array = np.zeros((m.n_hmc_steps * num_vels_per_hmc, m.n_latent))
velocity_array[0] = ma.assert_numpy(init_vel)

for hmc_num in range(m.n_hmc_steps):
    if hmc_num == 0:
        curr_pos = cast_array_to_local_type(init_pos)
        curr_vel = init_vel
    else:
        curr_vel = f_updated_vel_from_noise(single_X, curr_vel, velocity_noise[hmc_num])
        velocity_array[hmc_num * (m.n_lf_steps + 2)] = ma.assert_numpy(curr_vel)
    
    lf_step_results = f_perform_lf_steps(single_X, curr_pos, curr_vel)
    pos_steps = lf_step_results[:m.n_lf_steps]
    vel_half_steps_and_final = lf_step_results[m.n_lf_steps:]
    final_vel = lf_step_results[-1]
    final_pos = pos_steps[-1]
    
    position_array[hmc_num * m.n_lf_steps + 1: (hmc_num + 1)*m.n_lf_steps + 1] = ma.assert_numpy(pos_steps[:, 0, :])
    velocity_array[hmc_num * num_vels_per_hmc + 1: (hmc_num + 1) * num_vels_per_hmc] = ma.assert_numpy(vel_half_steps_and_final[:, 0, :])
    
    curr_pos = final_pos
    curr_vel = final_vel
In [42]:
fig, axs = plt.subplots(1, 2, figsize=(18, 9))
step_color = cm.jet(np.linspace(0, 1, position_array.shape[0]))
CS = axs[0].contour(x, y, pot_energy_matrix, 40)
CS_vel = axs[1].contour(kin_x, kin_y, kin_energy_matrix, vel_contour_levels)
hmc_step_indices = np.arange(0, position_array.shape[0], m.n_lf_steps)
size_array = 40*np.ones((position_array.shape[0],))
size_array[hmc_step_indices] = 100
axs[0].scatter(position_array[:, dim1], position_array[:, dim2], c=step_color, lw=1, s=size_array)
axs[1].set_color_cycle(step_color)

for hmc_num in range(m.n_hmc_steps):
    curr_vel_range = np.arange(num_vels_per_hmc * hmc_num, num_vels_per_hmc * (hmc_num + 1) - 2)
    init_vel_ind = hmc_num * num_vels_per_hmc
    final_vel_ind = (hmc_num + 1) * num_vels_per_hmc - 1
    curr_index = hmc_step_indices[hmc_num]
    next_index = hmc_step_indices[hmc_num + 1]
    for j in curr_vel_range:
        axs[1].plot(velocity_array[j:j+2, dim1], velocity_array[j:j+2, dim2], lw=2)
    axs[1].scatter(velocity_array[init_vel_ind, dim1], velocity_array[init_vel_ind, dim2], c=step_color[curr_index], lw=0, s=100)
    axs[1].scatter(velocity_array[final_vel_ind, dim1], velocity_array[final_vel_ind, dim2], c=step_color[next_index], lw=0, s=100)

for hmc_num in range(m.n_hmc_steps):
    final_vel_ind = (hmc_num + 1) * num_vels_per_hmc - 1
    next_index = hmc_step_indices[hmc_num + 1]
    axs[1].plot(velocity_array[final_vel_ind-1:final_vel_ind+1, dim1], velocity_array[final_vel_ind-1:final_vel_ind+1, dim2], lw=2, c=step_color[next_index])

axs[0].set_aspect('equal', 'datalim')
axs[1].set_aspect('equal', 'datalim')

Auxiliary model for the final velocity

Inverse model r(v|x, z, t)

In [43]:
inv_model_mean_output = np.zeros((m.n_hmc_steps, m.n_latent, num_repeats, m.n_latent))
inv_model_var_output = np.zeros((m.n_hmc_steps, m.n_latent, num_repeats, m.n_latent))

for i in range(m.n_hmc_steps):
    variation_start = z_sample_mean[i] - 2*z_sample_std[i]
    variation_end = z_sample_mean[i] + 2*z_sample_std[i]
    
    time_array = cast_array_to_local_type(np.array([i]).astype('float32'))
    
    for variation_dim in range(m.n_latent):
        z_variation = np.linspace(variation_start[variation_dim], variation_end[variation_dim], num_repeats)
        sample_array = np.tile(z_sample_mean[i], (num_repeats, 1))
        sample_array[:, variation_dim] = z_variation
        local_type_sample_array = cast_array_to_local_type(sample_array)
        if 'velocity' not in m.auxiliary_inv_model_inpt:
            inv_model_mean_output[i, variation_dim] = f_aux_inv_mean(repeated_X, local_type_sample_array, time_array)
            inv_model_var_output[i, variation_dim] = f_aux_inv_var(repeated_X, local_type_sample_array, time_array)
        else:
            velocity_array = 0.0 * local_type_sample_array
            inv_model_mean_output[i, variation_dim] = f_aux_inv_mean(repeated_X, local_type_sample_array, velocity_array, time_array)
            inv_model_var_output[i, variation_dim] = f_aux_inv_var(repeated_X, local_type_sample_array, velocity_array, time_array)

Final velocity model

In [44]:
variation_start = z_sample_mean[m.n_hmc_steps] - 2*z_sample_std[m.n_hmc_steps]
variation_end = z_sample_mean[m.n_hmc_steps] + 2*z_sample_std[m.n_hmc_steps]
print variation_start
print variation_end
final_vel_model_mean_output = np.zeros((m.n_latent, num_repeats, m.n_latent))
final_vel_model_var_output = np.zeros((m.n_latent, num_repeats, m.n_latent))

for variation_dim in range(m.n_latent):
    z_variation = np.linspace(variation_start[variation_dim], variation_end[variation_dim], num_repeats)
    sample_array = np.tile(z_sample_final_mean, (num_repeats, 1))
    sample_array[:, variation_dim] = z_variation
    final_vel_model_mean_output[variation_dim] = f_v_final_mean(repeated_X, cast_array_to_local_type(sample_array))
    final_vel_model_var_output[variation_dim] = f_v_final_var(repeated_X, cast_array_to_local_type(sample_array))
[-0.4838033   0.24788277]
[-0.42754647  0.30257216]
In [45]:
fig, axs = plt.subplots(m.n_hmc_steps, 2, figsize=(18, 9*m.n_hmc_steps))

for i in range(m.n_hmc_steps - 1):
    axs[i, 0].scatter(inv_model_mean_output[i+1, :, :, dim1], 
           inv_model_mean_output[i+1, :, :, dim2],  
           c=np.transpose(np.tile(np.linspace(0, m.n_latent-1, m.n_latent), (num_repeats, 1))), 
           lw=0, s=5)
    axs[i, 1].scatter(inv_model_var_output[i+1, :, :, dim1], 
           inv_model_var_output[i+1, :, :, dim2],  
           c=np.transpose(np.tile(np.linspace(0, m.n_latent-1, m.n_latent), (num_repeats, 1))), 
           lw=0, s=5)
    axs[i, 0].set_title('Aux. inv. model ' + str(i+1) + ': Mean')
    axs[i, 1].set_title('Final vel. model ' + str(i+1) + ': Variance')
        
if m.n_hmc_steps == 1:
    axs[0].scatter(final_vel_model_mean_output[:, :, dim1], 
               final_vel_model_mean_output[:, :, dim2],  
               c=np.transpose(np.tile(np.linspace(0,m.n_latent-1,m.n_latent), (num_repeats, 1))), 
               lw=0, s=5)
    axs[1].scatter(final_vel_model_var_output[:, :, dim1], 
               final_vel_model_var_output[:, :, dim2],  
               c=np.transpose(np.tile(np.linspace(0,m.n_latent-1,m.n_latent), (num_repeats, 1))), 
               lw=0, s=5)
    axs[0].set_title('Final vel. model: Mean')
    axs[1].set_title('Final vel. model: Variance')
else:
    axs[m.n_hmc_steps-1, 0].scatter(final_vel_model_mean_output[:, :, dim1], 
               final_vel_model_mean_output[:, :, dim2],  
               c=np.transpose(np.tile(np.linspace(0,m.n_latent-1,m.n_latent), (num_repeats, 1))), 
               lw=0, s=5)
    axs[m.n_hmc_steps-1, 1].scatter(final_vel_model_var_output[:, :, dim1], 
               final_vel_model_var_output[:, :, dim2],  
               c=np.transpose(np.tile(np.linspace(0,m.n_latent-1,m.n_latent), (num_repeats, 1))), 
               lw=0, s=5)
    axs[m.n_hmc_steps-1, 0].set_title('Final vel. model: Mean')
    axs[m.n_hmc_steps-1, 1].set_title('Final vel. model: Variance')

plt.show()
In [46]:
print v_samples[0, :, :].mean(axis=0)
print v_samples[1, :, :].mean(axis=0)
print v_samples[2, :, :].mean(axis=0)
print v_samples[3, :, :].mean(axis=0)
print
print v_samples[0, :, :].var(axis=0)
print v_samples[1, :, :].var(axis=0)
print v_samples[2, :, :].var(axis=0)
print v_samples[3, :, :].var(axis=0)
[-0.00794157 -0.00997367]
[ 1.59714448 -1.71224022]
[-1.63277793  2.17609811]
[ 1.33174348 -1.57673645]

[ 0.40558866  0.50626177]
[ 0.77036244  1.33797956]
[ 0.7405386   1.92231333]
[ 0.75407332  1.17725432]
In [ ]:
final_z_samples = cast_array_to_local_type(z_samples[m.n_hmc_steps, :, :])
final_v_samples = cast_array_to_local_type(v_samples[m.n_hmc_steps, :, :])
final_vel_mean = f_v_final_mean(repeated_X, final_z_samples)
final_vel_var = f_v_final_var(repeated_X, final_z_samples)
final_vel_nll = f_v_final_model_nll(repeated_X, final_z_samples, final_v_samples)
In [ ]:
print f_kin_energy_nll(single_X).sum(-1)

print final_vel_nll.mean()
print final_vel_nll.min()
print final_vel_nll.max()
In [ ]:
#m.parameters.view(m.auxiliary_inv_model.mlp.layers[2].weights)
print m.parameters._var_to_slice[m.auxiliary_inv_model.mlp.layers[2].weights]
#grad[594190:594990]
#old_weights_array = m.parameters.view(m.auxiliary_inv_model.mlp.layers[0].weights)
#new_array = old_weights_array.copy()
#new_array[-1, :] = 1.0 + old_weights_array[-1, :]
#m.parameters.__setitem__(m.auxiliary_inv_model.mlp.layers[0].weights, new_array)
print m.parameters.view(m.auxiliary_inv_model.mlp.layers[2].weights).shape
In [ ]:
np.set_printoptions(suppress=True)
print np.around(np.reshape(mean_grad[396390:553790], (787, 200)), 5)[:198]  # 396390:553790
np.reshape(mean_grad[594190:594990], (200, 4))  # 396390:553790
#print np.around(np.reshape(m.parameters.data[396390:553790].astype('float32'), (787, 200)), 5)[:50]

Old snippets

In [ ]:
fig, axs = plt.subplots(4, 2, figsize=(18, 36))
# TODO: Analysis of how final_vel_mean and final_vel_var depend on z (since they all share the same x)

print z_samples[3, :, :].mean(axis=0)
print z_samples[3, :, :].var(axis=0)
print v_samples[3, :, :].mean(axis=0)
print v_samples[3, :, :].var(axis=0)
print f_v_init_var(np.array([X[X_index, :]]))

print final_vel_nll.mean()
plt.boxplot(final_vel_nll, whis=1)
plt.show()
In [ ]:
centers = np.zeros((10,n_latents))
stddevs = np.zeros((10,n_latents))
centers_init = np.zeros((10,n_latents))
stddevs_init = np.zeros((10,n_latents))
for i in range(10):
    Li = f_z_sample(X[Z.argmax(1) == i])
    centers[i] = Li.mean(axis=0)
    stddevs[i] = np.std(Li, axis=0)
    
    Li_init = f_z_init_sample(X[Z.argmax(1) == i])
    centers_init[i] = Li_init.mean(axis=0)
    stddevs_init[i] = np.std(Li_init, axis=0)
In [ ]:
fig, axs = plt.subplots(1, 2, figsize=(18, 9))
axs[0].scatter(centers[:, dim1], centers[:, dim2], c=range(10), s=50)
axs[0].scatter(centers_init[:, dim1], centers_init[:, dim2], c=range(10), s=50, marker=u's')

axs[1].scatter(centers[:, dim1], centers[:, dim2], c=range(10), s=50)
axs[1].scatter(centers[:, dim1] + stddevs[:, dim1], centers[:, dim2], c=range(10), s=50, marker=u'>')
axs[1].scatter(centers[:, dim1] - stddevs[:, dim1], centers[:, dim2], c=range(10), s=50, marker=u'<')
axs[1].scatter(centers[:, dim1], centers[:, dim2] + stddevs[:, dim2], c=range(10), s=50, marker=u'^')
axs[1].scatter(centers[:, dim1], centers[:, dim2] - stddevs[:, dim2], c=range(10), s=50, marker=u'v')

#axs[0].set_xlim(-1.2, 1.2)
#axs[0].set_ylim(-1.2, 1.2)
#axs[1].set_xlim(-1.2, 1.2)
#axs[1].set_ylim(-1.2, 1.2)

print (centers[:, dim1] - centers_init[:, dim1])
print (centers[:, dim2] - centers_init[:, dim2])
print (stddevs[:, dim1] - stddevs_init[:, dim1])
print (stddevs[:, dim2] - stddevs_init[:, dim2])
In [ ]: